{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2de88b8c-261a-46cf-9d1d-4bcdf9e47d1d",
   "metadata": {},
   "source": [
    "# 分类算法\n",
    "\n"
   ]
  },
  {
   "cell_type": "raw",
   "id": "b875dfd6-db47-475f-afe6-801505796edc",
   "metadata": {},
   "source": [
    "考试\n",
    "快速介绍完算法\n",
    "\n",
    "目录\n",
    "kD树不讲\n",
    "数据降维：下周\n",
    "今日：第四章-分类\n",
    "最后一章聚类--分类 有间度的学习（已知标签，建模），聚类的一切都 是未知的\n",
    "第七章-神经网络，之后不讲 讲多重网络（介绍CNN）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e90cb050-1181-498e-8ee2-f1dbca71a9a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "from sklearn.linear_model import LogisticRegression          # 逻辑回归\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report       # metrics 度量，评估\n",
    "from sklearn.metrics import confusion_matrix        # confusion_matrix  混淆矩阵\n",
    "from sklearn.metrics import roc_curve               # roc_curve\n",
    "from sklearn.metrics import auc"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "01b02936-d1b0-4ae2-93e9-770dfbed31a7",
   "metadata": {},
   "source": [
    "## 混淆矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "faf15b9b-059b-4a4c-9b9b-d1820e6196ed",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking</th>\n",
       "      <th>duration</th>\n",
       "      <th>history</th>\n",
       "      <th>purpose</th>\n",
       "      <th>amount</th>\n",
       "      <th>savings</th>\n",
       "      <th>employed</th>\n",
       "      <th>installp</th>\n",
       "      <th>marital</th>\n",
       "      <th>coapp</th>\n",
       "      <th>...</th>\n",
       "      <th>property</th>\n",
       "      <th>age</th>\n",
       "      <th>other</th>\n",
       "      <th>housing</th>\n",
       "      <th>existcr</th>\n",
       "      <th>job</th>\n",
       "      <th>depends</th>\n",
       "      <th>telephon</th>\n",
       "      <th>foreign</th>\n",
       "      <th>good_bad</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1169</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>67</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>48</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>5951</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>2096</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>49</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>7882</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>45</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>24</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>4870</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>53</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   checking  duration  history purpose  amount  savings  employed  installp  \\\n",
       "0         1         6        4       3    1169        5         5         4   \n",
       "1         2        48        2       3    5951        1         3         2   \n",
       "2         4        12        4       6    2096        1         4         2   \n",
       "3         1        42        2       2    7882        1         4         2   \n",
       "4         1        24        3       0    4870        1         3         3   \n",
       "\n",
       "   marital  coapp  ...  property  age  other  housing  existcr  job  depends  \\\n",
       "0        3      1  ...         1   67      3        2        2    3        1   \n",
       "1        2      1  ...         1   22      3        2        1    3        1   \n",
       "2        3      1  ...         1   49      3        2        1    2        2   \n",
       "3        3      3  ...         2   45      3        3        1    3        2   \n",
       "4        3      1  ...         4   53      3        3        2    3        2   \n",
       "\n",
       "   telephon  foreign  good_bad  \n",
       "0         2        1      good  \n",
       "1         1        1       bad  \n",
       "2         1        1      good  \n",
       "3         1        1      good  \n",
       "4         1        1       bad  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 利用pandas导入csv数据，查看前5行导入结果看是否正常\n",
    "import pandas as pd\n",
    "credit_df = pd.read_csv(\"d:/work/machine-learning/datasets/credit/credit.csv\")\n",
    "credit_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5dbb1584-2d5f-4641-b995-2ba9d17fd0b4",
   "metadata": {},
   "source": [
    "### 检查数据"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5ffeb0a4-c8d2-45db-8c12-67bd1058ff83",
   "metadata": {},
   "source": [
    "1. AAA.isnull().sum()\n",
    "2. AAA.info()\n",
    "3. AAA.describe()\n",
    "4. AAA[AAA.duplicatesd()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "80fdce7e-9554-493b-a8e2-f27cbfdc0123",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "checking    0\n",
       "duration    0\n",
       "history     0\n",
       "purpose     0\n",
       "amount      0\n",
       "savings     0\n",
       "employed    0\n",
       "installp    0\n",
       "marital     0\n",
       "coapp       0\n",
       "resident    0\n",
       "property    0\n",
       "age         0\n",
       "other       0\n",
       "housing     0\n",
       "existcr     0\n",
       "job         0\n",
       "depends     0\n",
       "telephon    0\n",
       "foreign     0\n",
       "good_bad    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 统计空值数据\n",
    "credit_df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "40c4c422-fe7e-49a3-9080-a1ec2f691983",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1000 entries, 0 to 999\n",
      "Data columns (total 21 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   checking  1000 non-null   int64 \n",
      " 1   duration  1000 non-null   int64 \n",
      " 2   history   1000 non-null   int64 \n",
      " 3   purpose   1000 non-null   object\n",
      " 4   amount    1000 non-null   int64 \n",
      " 5   savings   1000 non-null   int64 \n",
      " 6   employed  1000 non-null   int64 \n",
      " 7   installp  1000 non-null   int64 \n",
      " 8   marital   1000 non-null   int64 \n",
      " 9   coapp     1000 non-null   int64 \n",
      " 10  resident  1000 non-null   int64 \n",
      " 11  property  1000 non-null   int64 \n",
      " 12  age       1000 non-null   int64 \n",
      " 13  other     1000 non-null   int64 \n",
      " 14  housing   1000 non-null   int64 \n",
      " 15  existcr   1000 non-null   int64 \n",
      " 16  job       1000 non-null   int64 \n",
      " 17  depends   1000 non-null   int64 \n",
      " 18  telephon  1000 non-null   int64 \n",
      " 19  foreign   1000 non-null   int64 \n",
      " 20  good_bad  1000 non-null   object\n",
      "dtypes: int64(19), object(2)\n",
      "memory usage: 164.2+ KB\n"
     ]
    }
   ],
   "source": [
    "credit_df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "766af6fc-c4fb-4b7b-916c-1349f9003a10",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking</th>\n",
       "      <th>duration</th>\n",
       "      <th>history</th>\n",
       "      <th>amount</th>\n",
       "      <th>savings</th>\n",
       "      <th>employed</th>\n",
       "      <th>installp</th>\n",
       "      <th>marital</th>\n",
       "      <th>coapp</th>\n",
       "      <th>resident</th>\n",
       "      <th>property</th>\n",
       "      <th>age</th>\n",
       "      <th>other</th>\n",
       "      <th>housing</th>\n",
       "      <th>existcr</th>\n",
       "      <th>job</th>\n",
       "      <th>depends</th>\n",
       "      <th>telephon</th>\n",
       "      <th>foreign</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.00000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.00000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.577000</td>\n",
       "      <td>20.903000</td>\n",
       "      <td>2.54500</td>\n",
       "      <td>3271.258000</td>\n",
       "      <td>2.105000</td>\n",
       "      <td>3.384000</td>\n",
       "      <td>2.973000</td>\n",
       "      <td>2.68200</td>\n",
       "      <td>1.145000</td>\n",
       "      <td>2.845000</td>\n",
       "      <td>2.358000</td>\n",
       "      <td>35.546000</td>\n",
       "      <td>2.675000</td>\n",
       "      <td>1.929000</td>\n",
       "      <td>1.407000</td>\n",
       "      <td>2.904000</td>\n",
       "      <td>1.155000</td>\n",
       "      <td>1.404000</td>\n",
       "      <td>1.037000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.257638</td>\n",
       "      <td>12.058814</td>\n",
       "      <td>1.08312</td>\n",
       "      <td>2822.736876</td>\n",
       "      <td>1.580023</td>\n",
       "      <td>1.208306</td>\n",
       "      <td>1.118715</td>\n",
       "      <td>0.70808</td>\n",
       "      <td>0.477706</td>\n",
       "      <td>1.103718</td>\n",
       "      <td>1.050209</td>\n",
       "      <td>11.375469</td>\n",
       "      <td>0.705601</td>\n",
       "      <td>0.531264</td>\n",
       "      <td>0.577654</td>\n",
       "      <td>0.653614</td>\n",
       "      <td>0.362086</td>\n",
       "      <td>0.490943</td>\n",
       "      <td>0.188856</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>250.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>19.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>12.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>1365.500000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>27.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>2319.500000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>33.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>24.000000</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>3972.250000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>3.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>42.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>18424.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>75.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          checking     duration     history        amount      savings  \\\n",
       "count  1000.000000  1000.000000  1000.00000   1000.000000  1000.000000   \n",
       "mean      2.577000    20.903000     2.54500   3271.258000     2.105000   \n",
       "std       1.257638    12.058814     1.08312   2822.736876     1.580023   \n",
       "min       1.000000     4.000000     0.00000    250.000000     1.000000   \n",
       "25%       1.000000    12.000000     2.00000   1365.500000     1.000000   \n",
       "50%       2.000000    18.000000     2.00000   2319.500000     1.000000   \n",
       "75%       4.000000    24.000000     4.00000   3972.250000     3.000000   \n",
       "max       4.000000    72.000000     4.00000  18424.000000     5.000000   \n",
       "\n",
       "          employed     installp     marital        coapp     resident  \\\n",
       "count  1000.000000  1000.000000  1000.00000  1000.000000  1000.000000   \n",
       "mean      3.384000     2.973000     2.68200     1.145000     2.845000   \n",
       "std       1.208306     1.118715     0.70808     0.477706     1.103718   \n",
       "min       1.000000     1.000000     1.00000     1.000000     1.000000   \n",
       "25%       3.000000     2.000000     2.00000     1.000000     2.000000   \n",
       "50%       3.000000     3.000000     3.00000     1.000000     3.000000   \n",
       "75%       5.000000     4.000000     3.00000     1.000000     4.000000   \n",
       "max       5.000000     4.000000     4.00000     3.000000     4.000000   \n",
       "\n",
       "          property          age        other      housing      existcr  \\\n",
       "count  1000.000000  1000.000000  1000.000000  1000.000000  1000.000000   \n",
       "mean      2.358000    35.546000     2.675000     1.929000     1.407000   \n",
       "std       1.050209    11.375469     0.705601     0.531264     0.577654   \n",
       "min       1.000000    19.000000     1.000000     1.000000     1.000000   \n",
       "25%       1.000000    27.000000     3.000000     2.000000     1.000000   \n",
       "50%       2.000000    33.000000     3.000000     2.000000     1.000000   \n",
       "75%       3.000000    42.000000     3.000000     2.000000     2.000000   \n",
       "max       4.000000    75.000000     3.000000     3.000000     4.000000   \n",
       "\n",
       "               job      depends     telephon      foreign  \n",
       "count  1000.000000  1000.000000  1000.000000  1000.000000  \n",
       "mean      2.904000     1.155000     1.404000     1.037000  \n",
       "std       0.653614     0.362086     0.490943     0.188856  \n",
       "min       1.000000     1.000000     1.000000     1.000000  \n",
       "25%       3.000000     1.000000     1.000000     1.000000  \n",
       "50%       3.000000     1.000000     1.000000     1.000000  \n",
       "75%       3.000000     1.000000     2.000000     1.000000  \n",
       "max       4.000000     2.000000     2.000000     2.000000  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "9a984d43-f8f9-4742-9392-1c69de6b4da1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking</th>\n",
       "      <th>duration</th>\n",
       "      <th>history</th>\n",
       "      <th>purpose</th>\n",
       "      <th>amount</th>\n",
       "      <th>savings</th>\n",
       "      <th>employed</th>\n",
       "      <th>installp</th>\n",
       "      <th>marital</th>\n",
       "      <th>coapp</th>\n",
       "      <th>...</th>\n",
       "      <th>property</th>\n",
       "      <th>age</th>\n",
       "      <th>other</th>\n",
       "      <th>housing</th>\n",
       "      <th>existcr</th>\n",
       "      <th>job</th>\n",
       "      <th>depends</th>\n",
       "      <th>telephon</th>\n",
       "      <th>foreign</th>\n",
       "      <th>good_bad</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>0 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [checking, duration, history, purpose, amount, savings, employed, installp, marital, coapp, resident, property, age, other, housing, existcr, job, depends, telephon, foreign, good_bad]\n",
       "Index: []\n",
       "\n",
       "[0 rows x 21 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 显示重复数据\n",
    "credit_df[credit_df.duplicated()]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a6763958-7b86-4736-8351-e6e9aaeb5712",
   "metadata": {},
   "source": [
    "# 数据准备\n",
    "- 类别型变量进行数字编码（one-hot独热编码）\n",
    "- 数据集拆分成train和test\n",
    "\n",
    "- 准备好X_train,y_train,X_test,y_test数据对象"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b81b5edc-3c4d-4926-be1c-8796b39df59d",
   "metadata": {},
   "source": [
    "## 独热编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "18ca586f-5005-4b80-8273-6d97980adb8e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      4\n",
       "1      2\n",
       "2      4\n",
       "3      2\n",
       "4      3\n",
       "      ..\n",
       "995    2\n",
       "996    2\n",
       "997    2\n",
       "998    2\n",
       "999    4\n",
       "Name: history, Length: 1000, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df.history"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "7b625e3d-6770-4934-9c89-93c0292f69b5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>purpose_0</th>\n",
       "      <th>purpose_1</th>\n",
       "      <th>purpose_2</th>\n",
       "      <th>purpose_3</th>\n",
       "      <th>purpose_4</th>\n",
       "      <th>purpose_5</th>\n",
       "      <th>purpose_6</th>\n",
       "      <th>purpose_8</th>\n",
       "      <th>purpose_9</th>\n",
       "      <th>purpose_X</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   purpose_0  purpose_1  purpose_2  purpose_3  purpose_4  purpose_5  \\\n",
       "0          0          0          0          1          0          0   \n",
       "1          0          0          0          1          0          0   \n",
       "2          0          0          0          0          0          0   \n",
       "3          0          0          1          0          0          0   \n",
       "4          1          0          0          0          0          0   \n",
       "\n",
       "   purpose_6  purpose_8  purpose_9  purpose_X  \n",
       "0          0          0          0          0  \n",
       "1          0          0          0          0  \n",
       "2          1          0          0          0  \n",
       "3          0          0          0          0  \n",
       "4          0          0          0          0  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# prefix 列标签\n",
    "# 把类别型变量进行独热编码(1->N)\n",
    "checking = pd.get_dummies(credit_df.checking,prefix='checking')\n",
    "history = pd.get_dummies(credit_df.history,prefix='history')\n",
    "purpose = pd.get_dummies(credit_df.purpose,prefix='purpose')\n",
    "savings = pd.get_dummies(credit_df.savings,prefix='savings')\n",
    "employed = pd.get_dummies(credit_df.employed,prefix='employed')\n",
    "installp = pd.get_dummies(credit_df.installp,prefix='installp')\n",
    "marital = pd.get_dummies(credit_df.marital,prefix='marital')\n",
    "coapp = pd.get_dummies(credit_df.coapp,prefix='coapp')\n",
    "installp = pd.get_dummies(credit_df.installp,prefix='installp')\n",
    "resident = pd.get_dummies(credit_df.resident,prefix='resident')\n",
    "property = pd.get_dummies(credit_df.property,prefix='property')\n",
    "housing = pd.get_dummies(credit_df.housing,prefix='housing')\n",
    "existcr = pd.get_dummies(credit_df.existcr,prefix='existcr')\n",
    "job = pd.get_dummies(credit_df.job,prefix='job')\n",
    "depends = pd.get_dummies(credit_df.depends,prefix='depends')\n",
    "telephon = pd.get_dummies(credit_df.telephon,prefix='telephon')\n",
    "foreign = pd.get_dummies(credit_df.foreign,prefix='foreign')\n",
    "\n",
    "# 随便查看一个编码后的数据\n",
    "purpose.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "a5e1286d-5f07-49a7-938f-9ead925fb1ea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>amount</th>\n",
       "      <th>age</th>\n",
       "      <th>checking_1</th>\n",
       "      <th>checking_2</th>\n",
       "      <th>checking_3</th>\n",
       "      <th>checking_4</th>\n",
       "      <th>history_0</th>\n",
       "      <th>history_1</th>\n",
       "      <th>history_2</th>\n",
       "      <th>...</th>\n",
       "      <th>job_1</th>\n",
       "      <th>job_2</th>\n",
       "      <th>job_3</th>\n",
       "      <th>job_4</th>\n",
       "      <th>depends_1</th>\n",
       "      <th>depends_2</th>\n",
       "      <th>telephon_1</th>\n",
       "      <th>telephon_2</th>\n",
       "      <th>foreign_1</th>\n",
       "      <th>foreign_2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>1169</td>\n",
       "      <td>67</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>48</td>\n",
       "      <td>5951</td>\n",
       "      <td>22</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12</td>\n",
       "      <td>2096</td>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>42</td>\n",
       "      <td>7882</td>\n",
       "      <td>45</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24</td>\n",
       "      <td>4870</td>\n",
       "      <td>53</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 72 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   duration  amount  age  checking_1  checking_2  checking_3  checking_4  \\\n",
       "0         6    1169   67           1           0           0           0   \n",
       "1        48    5951   22           0           1           0           0   \n",
       "2        12    2096   49           0           0           0           1   \n",
       "3        42    7882   45           1           0           0           0   \n",
       "4        24    4870   53           1           0           0           0   \n",
       "\n",
       "   history_0  history_1  history_2  ...  job_1  job_2  job_3  job_4  \\\n",
       "0          0          0          0  ...      0      0      1      0   \n",
       "1          0          0          1  ...      0      0      1      0   \n",
       "2          0          0          0  ...      0      1      0      0   \n",
       "3          0          0          1  ...      0      0      1      0   \n",
       "4          0          0          0  ...      0      0      1      0   \n",
       "\n",
       "   depends_1  depends_2  telephon_1  telephon_2  foreign_1  foreign_2  \n",
       "0          1          0           0           1          1          0  \n",
       "1          1          0           1           0          1          0  \n",
       "2          0          1           1           0          1          0  \n",
       "3          0          1           1           0          1          0  \n",
       "4          0          1           1           0          1          0  \n",
       "\n",
       "[5 rows x 72 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 利用concat()函数把转换后的数据对象全部合并在一起变成新的数据对象，命名为trainData_X\n",
    "trainData_X = pd.concat([credit_df.duration, credit_df.amount, credit_df.age, checking, history, purpose, savings,\n",
    "                         employed, installp, marital, coapp, installp, resident, property, housing, existcr, job, \n",
    "                         depends, telephon, foreign], axis=1)\n",
    "trainData_X.head()\n",
    "# 72 columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "f430071f-b72f-40c4-a02b-d081256d1cd8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    700\n",
       "1    300\n",
       "Name: target, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 构造目标变量y的数据对象\n",
    "# 当前的目标变量good_bad的取值是'good'和'bad'，一般我们习惯把二分类的类别值编码为0和1，1一般表示类别比较少的那一类，他们往往是我们感兴趣的\n",
    "credit_df['target'] = 0\n",
    "credit_df.loc[(credit_df.good_bad == 'bad'), 'target'] = 1\n",
    "\n",
    "trainData_y = credit_df['target'] \n",
    "# 数值统计\n",
    "trainData_y.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "a8b76207-7d03-46a6-8611-b153da541b50",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train,X_test,y_train,y_test  =  train_test_split(trainData_X, trainData_y,test_size=0.3, random_state=123456)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "e16ab007-d7b3-4617-991d-958d06ca62b2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(700, 72)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "e446ad10-ccce-461c-8dc2-a6942e51110d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression(solver=&#x27;liblinear&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression(solver=&#x27;liblinear&#x27;)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "LogisticRegression(solver='liblinear')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "# 通过LogisticRegression类定义一个逻辑回归模型名字叫lr\n",
    "lr = LogisticRegression(solver='liblinear')\n",
    "\n",
    "# 对lr模型进行训练(fit)\n",
    "lr.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "df1cb834-24c6-4e5d-9a79-776c3bece0ef",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-0.29030647]\n",
      "[[ 2.78585736e-02  1.11255018e-04 -1.29746209e-02  7.31943927e-01\n",
      "   1.91070645e-01 -1.46569430e-01 -1.06675161e+00  5.46365447e-01\n",
      "   6.85169220e-01 -1.81258020e-01 -4.99406482e-01 -8.41176632e-01\n",
      "   5.85532800e-01 -1.00297822e+00 -5.20971408e-01 -3.17886803e-01\n",
      "   8.54588303e-02  4.53774142e-01  9.48276612e-01 -4.33124067e-02\n",
      "  -2.77159947e-01 -2.01040067e-01  4.16169483e-01  2.53944628e-01\n",
      "  -2.05956058e-02 -4.57369415e-01 -4.82455556e-01 -1.69238670e-03\n",
      "   1.62232686e-01  7.25358877e-03 -4.26733347e-01 -3.13670082e-02\n",
      "  -3.32915062e-01 -1.56215186e-01  1.00293245e-01  9.85305354e-02\n",
      "   2.37317408e-01  6.55382919e-02 -4.67081357e-01 -1.26080809e-01\n",
      "  -8.17753161e-02  3.84350573e-01 -5.92881724e-01 -3.32915062e-01\n",
      "  -1.56215186e-01  1.00293245e-01  9.85305354e-02 -4.74275118e-01\n",
      "   3.79368176e-01 -2.03612554e-02 -1.75038270e-01 -2.48206378e-01\n",
      "   1.61431950e-02 -2.54092858e-02 -3.28339981e-02  2.17765790e-01\n",
      "  -3.78834462e-01 -1.29237795e-01 -3.52735995e-01 -1.89624910e-02\n",
      "  -1.52618282e-01  2.34010300e-01 -5.46775919e-02 -1.47178592e-01\n",
      "   7.43240453e-02 -1.62774329e-01 -3.57151088e-01  6.68446209e-02\n",
      "   2.37965151e-02 -3.14102982e-01  5.58298124e-01 -8.48604591e-01]]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>coef</th>\n",
       "      <th>columns</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[0.02785857357061813]</td>\n",
       "      <td>duration</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[0.00011125501772760715]</td>\n",
       "      <td>amount</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[-0.01297462091704518]</td>\n",
       "      <td>age</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[0.7319439273593329]</td>\n",
       "      <td>checking_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[0.1910706446593187]</td>\n",
       "      <td>checking_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>[0.06684462089753095]</td>\n",
       "      <td>depends_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>[0.02379651513622192]</td>\n",
       "      <td>telephon_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>[-0.31410298209933957]</td>\n",
       "      <td>telephon_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>[0.5582981244841964]</td>\n",
       "      <td>foreign_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>[-0.8486045914473064]</td>\n",
       "      <td>foreign_2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>72 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        coef     columns\n",
       "0      [0.02785857357061813]    duration\n",
       "1   [0.00011125501772760715]      amount\n",
       "2     [-0.01297462091704518]         age\n",
       "3       [0.7319439273593329]  checking_1\n",
       "4       [0.1910706446593187]  checking_2\n",
       "..                       ...         ...\n",
       "67     [0.06684462089753095]   depends_2\n",
       "68     [0.02379651513622192]  telephon_1\n",
       "69    [-0.31410298209933957]  telephon_2\n",
       "70      [0.5582981244841964]   foreign_1\n",
       "71     [-0.8486045914473064]   foreign_2\n",
       "\n",
       "[72 rows x 2 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "# 查看模型结果\n",
    "print(lr.intercept_ )\n",
    "print(lr.coef_)\n",
    "\n",
    "# 把变量名称和系数对应在一起方便查看\n",
    "pd.DataFrame(list(zip(np.transpose(lr.coef_), X_train.columns)), columns=['coef', 'columns'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eaf887c0-2a4a-4263-b112-c33658243e49",
   "metadata": {},
   "source": [
    "# 模型评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "d10ff75a-5024-4813-bd10-2a559fd14956",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.82      0.85      0.83       210\n",
      "           1       0.61      0.57      0.59        90\n",
      "\n",
      "    accuracy                           0.76       300\n",
      "   macro avg       0.72      0.71      0.71       300\n",
      "weighted avg       0.76      0.76      0.76       300\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "# 利用模型对测试集进行预测，输出target预测标签值和概率\n",
    "y_test_pred = lr.predict(X_test)\n",
    "y_test_prob = lr.predict_proba(X_test)\n",
    "\n",
    "# 分类评估汇总报告classification_report\n",
    "print(classification_report(y_test,y_test_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "c7817ce7-ece5-4dd7-9911-247875add3bb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[178  32]\n",
      " [ 39  51]]\n"
     ]
    }
   ],
   "source": [
    "# 纵向为真值，水平项为预测值\n",
    "# 混淆矩阵 confusion_matrix\n",
    "print(confusion_matrix(y_test,y_test_pred))"
   ]
  },
  {
   "cell_type": "raw",
   "id": "b1928196-2834-4585-8014-832238b8aaa3",
   "metadata": {},
   "source": [
    "      0     1\n",
    "0   178     32         TP   FN\n",
    "1    39     51         FP   TN\n",
    "\n",
    "\n",
    "TP（True Positive）与假正例（False Positive, FP）、真负例（True Negative, TN）和假负例（False Negative, FN）"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "00e4f501-6457-4395-9cff-7c8e087e03d0",
   "metadata": {},
   "source": [
    "1. 数据准备 （读入数据，检查数据，独热，检查）\n",
    "2. 构建数据集（train_test）\n",
    "3. 建模\n",
    "4. 评价"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d6eeb9fe-89a6-48b0-b1ee-6ef9c6ae22e7",
   "metadata": {},
   "source": [
    "# 调参"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "4284b934-d717-441d-a947-eee80c56cc4d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression(C=1, solver='liblinear')\n",
      "0.7614285714285713\n",
      "{'C': 1, 'penalty': 'l2'}\n"
     ]
    }
   ],
   "source": [
    "# grid search调参\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "parameters = {\n",
    "    'penalty': ('l1', 'l2'),\n",
    "    'C': (0.01, 0.1, 1, 10),\n",
    "}\n",
    "\n",
    "lr = LogisticRegression(solver='liblinear')\n",
    "lr_search = GridSearchCV(lr, parameters, scoring='accuracy', cv=5)\n",
    "lr_search.fit(X_train, y_train)\n",
    "\n",
    "#查看最佳结果\n",
    "print(lr_search.best_estimator_)\n",
    "print(lr_search.best_score_)\n",
    "print(lr_search.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eb09cb7d-4648-453c-86c6-d2a2a6143df3",
   "metadata": {},
   "source": [
    "# 作业（20min）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "d7c78baa-560b-48c5-aa59-3afa2a717c8b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>situation</th>\n",
       "      <th>column_1</th>\n",
       "      <th>column_2</th>\n",
       "      <th>column_3</th>\n",
       "      <th>column_4</th>\n",
       "      <th>column_5</th>\n",
       "      <th>column_6</th>\n",
       "      <th>column_7</th>\n",
       "      <th>column_8</th>\n",
       "      <th>...</th>\n",
       "      <th>column_21</th>\n",
       "      <th>column_22</th>\n",
       "      <th>column_23</th>\n",
       "      <th>column_24</th>\n",
       "      <th>column_25</th>\n",
       "      <th>column_26</th>\n",
       "      <th>column_27</th>\n",
       "      <th>column_28</th>\n",
       "      <th>column_29</th>\n",
       "      <th>column_30</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>842302</td>\n",
       "      <td>M</td>\n",
       "      <td>17.99</td>\n",
       "      <td>10.38</td>\n",
       "      <td>122.80</td>\n",
       "      <td>1001.0</td>\n",
       "      <td>0.11840</td>\n",
       "      <td>0.27760</td>\n",
       "      <td>0.3001</td>\n",
       "      <td>0.14710</td>\n",
       "      <td>...</td>\n",
       "      <td>25.38</td>\n",
       "      <td>17.33</td>\n",
       "      <td>184.60</td>\n",
       "      <td>2019.0</td>\n",
       "      <td>0.1622</td>\n",
       "      <td>0.6656</td>\n",
       "      <td>0.7119</td>\n",
       "      <td>0.2654</td>\n",
       "      <td>0.4601</td>\n",
       "      <td>0.11890</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>842517</td>\n",
       "      <td>M</td>\n",
       "      <td>20.57</td>\n",
       "      <td>17.77</td>\n",
       "      <td>132.90</td>\n",
       "      <td>1326.0</td>\n",
       "      <td>0.08474</td>\n",
       "      <td>0.07864</td>\n",
       "      <td>0.0869</td>\n",
       "      <td>0.07017</td>\n",
       "      <td>...</td>\n",
       "      <td>24.99</td>\n",
       "      <td>23.41</td>\n",
       "      <td>158.80</td>\n",
       "      <td>1956.0</td>\n",
       "      <td>0.1238</td>\n",
       "      <td>0.1866</td>\n",
       "      <td>0.2416</td>\n",
       "      <td>0.1860</td>\n",
       "      <td>0.2750</td>\n",
       "      <td>0.08902</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>84300903</td>\n",
       "      <td>M</td>\n",
       "      <td>19.69</td>\n",
       "      <td>21.25</td>\n",
       "      <td>130.00</td>\n",
       "      <td>1203.0</td>\n",
       "      <td>0.10960</td>\n",
       "      <td>0.15990</td>\n",
       "      <td>0.1974</td>\n",
       "      <td>0.12790</td>\n",
       "      <td>...</td>\n",
       "      <td>23.57</td>\n",
       "      <td>25.53</td>\n",
       "      <td>152.50</td>\n",
       "      <td>1709.0</td>\n",
       "      <td>0.1444</td>\n",
       "      <td>0.4245</td>\n",
       "      <td>0.4504</td>\n",
       "      <td>0.2430</td>\n",
       "      <td>0.3613</td>\n",
       "      <td>0.08758</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>84348301</td>\n",
       "      <td>M</td>\n",
       "      <td>11.42</td>\n",
       "      <td>20.38</td>\n",
       "      <td>77.58</td>\n",
       "      <td>386.1</td>\n",
       "      <td>0.14250</td>\n",
       "      <td>0.28390</td>\n",
       "      <td>0.2414</td>\n",
       "      <td>0.10520</td>\n",
       "      <td>...</td>\n",
       "      <td>14.91</td>\n",
       "      <td>26.50</td>\n",
       "      <td>98.87</td>\n",
       "      <td>567.7</td>\n",
       "      <td>0.2098</td>\n",
       "      <td>0.8663</td>\n",
       "      <td>0.6869</td>\n",
       "      <td>0.2575</td>\n",
       "      <td>0.6638</td>\n",
       "      <td>0.17300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>84358402</td>\n",
       "      <td>M</td>\n",
       "      <td>20.29</td>\n",
       "      <td>14.34</td>\n",
       "      <td>135.10</td>\n",
       "      <td>1297.0</td>\n",
       "      <td>0.10030</td>\n",
       "      <td>0.13280</td>\n",
       "      <td>0.1980</td>\n",
       "      <td>0.10430</td>\n",
       "      <td>...</td>\n",
       "      <td>22.54</td>\n",
       "      <td>16.67</td>\n",
       "      <td>152.20</td>\n",
       "      <td>1575.0</td>\n",
       "      <td>0.1374</td>\n",
       "      <td>0.2050</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.1625</td>\n",
       "      <td>0.2364</td>\n",
       "      <td>0.07678</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 32 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         ID situation  column_1  column_2  column_3  column_4  column_5  \\\n",
       "0    842302         M     17.99     10.38    122.80    1001.0   0.11840   \n",
       "1    842517         M     20.57     17.77    132.90    1326.0   0.08474   \n",
       "2  84300903         M     19.69     21.25    130.00    1203.0   0.10960   \n",
       "3  84348301         M     11.42     20.38     77.58     386.1   0.14250   \n",
       "4  84358402         M     20.29     14.34    135.10    1297.0   0.10030   \n",
       "\n",
       "   column_6  column_7  column_8  ...  column_21  column_22  column_23  \\\n",
       "0   0.27760    0.3001   0.14710  ...      25.38      17.33     184.60   \n",
       "1   0.07864    0.0869   0.07017  ...      24.99      23.41     158.80   \n",
       "2   0.15990    0.1974   0.12790  ...      23.57      25.53     152.50   \n",
       "3   0.28390    0.2414   0.10520  ...      14.91      26.50      98.87   \n",
       "4   0.13280    0.1980   0.10430  ...      22.54      16.67     152.20   \n",
       "\n",
       "   column_24  column_25  column_26  column_27  column_28  column_29  column_30  \n",
       "0     2019.0     0.1622     0.6656     0.7119     0.2654     0.4601    0.11890  \n",
       "1     1956.0     0.1238     0.1866     0.2416     0.1860     0.2750    0.08902  \n",
       "2     1709.0     0.1444     0.4245     0.4504     0.2430     0.3613    0.08758  \n",
       "3      567.7     0.2098     0.8663     0.6869     0.2575     0.6638    0.17300  \n",
       "4     1575.0     0.1374     0.2050     0.4000     0.1625     0.2364    0.07678  \n",
       "\n",
       "[5 rows x 32 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 利用pandas导入csv数据，查看前5行导入结果看是否正常\n",
    "import pandas as pd\n",
    "credit_df = pd.read_csv(\"D:/work/homework/wdbcdata.txt\", header=None)\n",
    "#源DataFrame中是没有索引的，此处加上索引\n",
    "new_column_names =['ID','situation']+ [f'column_{i}' for i in range(1,31)]\n",
    "credit_df.columns = new_column_names\n",
    "credit_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "540e1975-04ad-4ae3-9445-c123595275e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "credit_df.drop(['ID'],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "ac832283-9ca9-4202-9e86-5111a7126758",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "B    357\n",
       "M    212\n",
       "Name: situation, dtype: int64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df['situation'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "301c36fa-b616-4134-b5d9-425c75908740",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "situation    0\n",
       "column_1     0\n",
       "column_2     0\n",
       "column_3     0\n",
       "column_4     0\n",
       "column_5     0\n",
       "column_6     0\n",
       "column_7     0\n",
       "column_8     0\n",
       "column_9     0\n",
       "column_10    0\n",
       "column_11    0\n",
       "column_12    0\n",
       "column_13    0\n",
       "column_14    0\n",
       "column_15    0\n",
       "column_16    0\n",
       "column_17    0\n",
       "column_18    0\n",
       "column_19    0\n",
       "column_20    0\n",
       "column_21    0\n",
       "column_22    0\n",
       "column_23    0\n",
       "column_24    0\n",
       "column_25    0\n",
       "column_26    0\n",
       "column_27    0\n",
       "column_28    0\n",
       "column_29    0\n",
       "column_30    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 统计空值数据\n",
    "credit_df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "2f10cb84-79f1-447f-a8e3-60548e9e7e6b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 569 entries, 0 to 568\n",
      "Data columns (total 31 columns):\n",
      " #   Column     Non-Null Count  Dtype  \n",
      "---  ------     --------------  -----  \n",
      " 0   situation  569 non-null    object \n",
      " 1   column_1   569 non-null    float64\n",
      " 2   column_2   569 non-null    float64\n",
      " 3   column_3   569 non-null    float64\n",
      " 4   column_4   569 non-null    float64\n",
      " 5   column_5   569 non-null    float64\n",
      " 6   column_6   569 non-null    float64\n",
      " 7   column_7   569 non-null    float64\n",
      " 8   column_8   569 non-null    float64\n",
      " 9   column_9   569 non-null    float64\n",
      " 10  column_10  569 non-null    float64\n",
      " 11  column_11  569 non-null    float64\n",
      " 12  column_12  569 non-null    float64\n",
      " 13  column_13  569 non-null    float64\n",
      " 14  column_14  569 non-null    float64\n",
      " 15  column_15  569 non-null    float64\n",
      " 16  column_16  569 non-null    float64\n",
      " 17  column_17  569 non-null    float64\n",
      " 18  column_18  569 non-null    float64\n",
      " 19  column_19  569 non-null    float64\n",
      " 20  column_20  569 non-null    float64\n",
      " 21  column_21  569 non-null    float64\n",
      " 22  column_22  569 non-null    float64\n",
      " 23  column_23  569 non-null    float64\n",
      " 24  column_24  569 non-null    float64\n",
      " 25  column_25  569 non-null    float64\n",
      " 26  column_26  569 non-null    float64\n",
      " 27  column_27  569 non-null    float64\n",
      " 28  column_28  569 non-null    float64\n",
      " 29  column_29  569 non-null    float64\n",
      " 30  column_30  569 non-null    float64\n",
      "dtypes: float64(30), object(1)\n",
      "memory usage: 137.9+ KB\n"
     ]
    }
   ],
   "source": [
    "credit_df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "0d8b2ddc-1092-4a79-b39b-fe7fc74333d9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>column_1</th>\n",
       "      <th>column_2</th>\n",
       "      <th>column_3</th>\n",
       "      <th>column_4</th>\n",
       "      <th>column_5</th>\n",
       "      <th>column_6</th>\n",
       "      <th>column_7</th>\n",
       "      <th>column_8</th>\n",
       "      <th>column_9</th>\n",
       "      <th>column_10</th>\n",
       "      <th>...</th>\n",
       "      <th>column_21</th>\n",
       "      <th>column_22</th>\n",
       "      <th>column_23</th>\n",
       "      <th>column_24</th>\n",
       "      <th>column_25</th>\n",
       "      <th>column_26</th>\n",
       "      <th>column_27</th>\n",
       "      <th>column_28</th>\n",
       "      <th>column_29</th>\n",
       "      <th>column_30</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "      <td>569.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>14.127292</td>\n",
       "      <td>19.289649</td>\n",
       "      <td>91.969033</td>\n",
       "      <td>654.889104</td>\n",
       "      <td>0.096360</td>\n",
       "      <td>0.104341</td>\n",
       "      <td>0.088799</td>\n",
       "      <td>0.048919</td>\n",
       "      <td>0.181162</td>\n",
       "      <td>0.062798</td>\n",
       "      <td>...</td>\n",
       "      <td>16.269190</td>\n",
       "      <td>25.677223</td>\n",
       "      <td>107.261213</td>\n",
       "      <td>880.583128</td>\n",
       "      <td>0.132369</td>\n",
       "      <td>0.254265</td>\n",
       "      <td>0.272188</td>\n",
       "      <td>0.114606</td>\n",
       "      <td>0.290076</td>\n",
       "      <td>0.083946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>3.524049</td>\n",
       "      <td>4.301036</td>\n",
       "      <td>24.298981</td>\n",
       "      <td>351.914129</td>\n",
       "      <td>0.014064</td>\n",
       "      <td>0.052813</td>\n",
       "      <td>0.079720</td>\n",
       "      <td>0.038803</td>\n",
       "      <td>0.027414</td>\n",
       "      <td>0.007060</td>\n",
       "      <td>...</td>\n",
       "      <td>4.833242</td>\n",
       "      <td>6.146258</td>\n",
       "      <td>33.602542</td>\n",
       "      <td>569.356993</td>\n",
       "      <td>0.022832</td>\n",
       "      <td>0.157336</td>\n",
       "      <td>0.208624</td>\n",
       "      <td>0.065732</td>\n",
       "      <td>0.061867</td>\n",
       "      <td>0.018061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>6.981000</td>\n",
       "      <td>9.710000</td>\n",
       "      <td>43.790000</td>\n",
       "      <td>143.500000</td>\n",
       "      <td>0.052630</td>\n",
       "      <td>0.019380</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.106000</td>\n",
       "      <td>0.049960</td>\n",
       "      <td>...</td>\n",
       "      <td>7.930000</td>\n",
       "      <td>12.020000</td>\n",
       "      <td>50.410000</td>\n",
       "      <td>185.200000</td>\n",
       "      <td>0.071170</td>\n",
       "      <td>0.027290</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.156500</td>\n",
       "      <td>0.055040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>11.700000</td>\n",
       "      <td>16.170000</td>\n",
       "      <td>75.170000</td>\n",
       "      <td>420.300000</td>\n",
       "      <td>0.086370</td>\n",
       "      <td>0.064920</td>\n",
       "      <td>0.029560</td>\n",
       "      <td>0.020310</td>\n",
       "      <td>0.161900</td>\n",
       "      <td>0.057700</td>\n",
       "      <td>...</td>\n",
       "      <td>13.010000</td>\n",
       "      <td>21.080000</td>\n",
       "      <td>84.110000</td>\n",
       "      <td>515.300000</td>\n",
       "      <td>0.116600</td>\n",
       "      <td>0.147200</td>\n",
       "      <td>0.114500</td>\n",
       "      <td>0.064930</td>\n",
       "      <td>0.250400</td>\n",
       "      <td>0.071460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>13.370000</td>\n",
       "      <td>18.840000</td>\n",
       "      <td>86.240000</td>\n",
       "      <td>551.100000</td>\n",
       "      <td>0.095870</td>\n",
       "      <td>0.092630</td>\n",
       "      <td>0.061540</td>\n",
       "      <td>0.033500</td>\n",
       "      <td>0.179200</td>\n",
       "      <td>0.061540</td>\n",
       "      <td>...</td>\n",
       "      <td>14.970000</td>\n",
       "      <td>25.410000</td>\n",
       "      <td>97.660000</td>\n",
       "      <td>686.500000</td>\n",
       "      <td>0.131300</td>\n",
       "      <td>0.211900</td>\n",
       "      <td>0.226700</td>\n",
       "      <td>0.099930</td>\n",
       "      <td>0.282200</td>\n",
       "      <td>0.080040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>15.780000</td>\n",
       "      <td>21.800000</td>\n",
       "      <td>104.100000</td>\n",
       "      <td>782.700000</td>\n",
       "      <td>0.105300</td>\n",
       "      <td>0.130400</td>\n",
       "      <td>0.130700</td>\n",
       "      <td>0.074000</td>\n",
       "      <td>0.195700</td>\n",
       "      <td>0.066120</td>\n",
       "      <td>...</td>\n",
       "      <td>18.790000</td>\n",
       "      <td>29.720000</td>\n",
       "      <td>125.400000</td>\n",
       "      <td>1084.000000</td>\n",
       "      <td>0.146000</td>\n",
       "      <td>0.339100</td>\n",
       "      <td>0.382900</td>\n",
       "      <td>0.161400</td>\n",
       "      <td>0.317900</td>\n",
       "      <td>0.092080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>28.110000</td>\n",
       "      <td>39.280000</td>\n",
       "      <td>188.500000</td>\n",
       "      <td>2501.000000</td>\n",
       "      <td>0.163400</td>\n",
       "      <td>0.345400</td>\n",
       "      <td>0.426800</td>\n",
       "      <td>0.201200</td>\n",
       "      <td>0.304000</td>\n",
       "      <td>0.097440</td>\n",
       "      <td>...</td>\n",
       "      <td>36.040000</td>\n",
       "      <td>49.540000</td>\n",
       "      <td>251.200000</td>\n",
       "      <td>4254.000000</td>\n",
       "      <td>0.222600</td>\n",
       "      <td>1.058000</td>\n",
       "      <td>1.252000</td>\n",
       "      <td>0.291000</td>\n",
       "      <td>0.663800</td>\n",
       "      <td>0.207500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 30 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         column_1    column_2    column_3     column_4    column_5  \\\n",
       "count  569.000000  569.000000  569.000000   569.000000  569.000000   \n",
       "mean    14.127292   19.289649   91.969033   654.889104    0.096360   \n",
       "std      3.524049    4.301036   24.298981   351.914129    0.014064   \n",
       "min      6.981000    9.710000   43.790000   143.500000    0.052630   \n",
       "25%     11.700000   16.170000   75.170000   420.300000    0.086370   \n",
       "50%     13.370000   18.840000   86.240000   551.100000    0.095870   \n",
       "75%     15.780000   21.800000  104.100000   782.700000    0.105300   \n",
       "max     28.110000   39.280000  188.500000  2501.000000    0.163400   \n",
       "\n",
       "         column_6    column_7    column_8    column_9   column_10  ...  \\\n",
       "count  569.000000  569.000000  569.000000  569.000000  569.000000  ...   \n",
       "mean     0.104341    0.088799    0.048919    0.181162    0.062798  ...   \n",
       "std      0.052813    0.079720    0.038803    0.027414    0.007060  ...   \n",
       "min      0.019380    0.000000    0.000000    0.106000    0.049960  ...   \n",
       "25%      0.064920    0.029560    0.020310    0.161900    0.057700  ...   \n",
       "50%      0.092630    0.061540    0.033500    0.179200    0.061540  ...   \n",
       "75%      0.130400    0.130700    0.074000    0.195700    0.066120  ...   \n",
       "max      0.345400    0.426800    0.201200    0.304000    0.097440  ...   \n",
       "\n",
       "        column_21   column_22   column_23    column_24   column_25  \\\n",
       "count  569.000000  569.000000  569.000000   569.000000  569.000000   \n",
       "mean    16.269190   25.677223  107.261213   880.583128    0.132369   \n",
       "std      4.833242    6.146258   33.602542   569.356993    0.022832   \n",
       "min      7.930000   12.020000   50.410000   185.200000    0.071170   \n",
       "25%     13.010000   21.080000   84.110000   515.300000    0.116600   \n",
       "50%     14.970000   25.410000   97.660000   686.500000    0.131300   \n",
       "75%     18.790000   29.720000  125.400000  1084.000000    0.146000   \n",
       "max     36.040000   49.540000  251.200000  4254.000000    0.222600   \n",
       "\n",
       "        column_26   column_27   column_28   column_29   column_30  \n",
       "count  569.000000  569.000000  569.000000  569.000000  569.000000  \n",
       "mean     0.254265    0.272188    0.114606    0.290076    0.083946  \n",
       "std      0.157336    0.208624    0.065732    0.061867    0.018061  \n",
       "min      0.027290    0.000000    0.000000    0.156500    0.055040  \n",
       "25%      0.147200    0.114500    0.064930    0.250400    0.071460  \n",
       "50%      0.211900    0.226700    0.099930    0.282200    0.080040  \n",
       "75%      0.339100    0.382900    0.161400    0.317900    0.092080  \n",
       "max      1.058000    1.252000    0.291000    0.663800    0.207500  \n",
       "\n",
       "[8 rows x 30 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "9213348f-7385-42a1-ae02-2d6db28efe1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构造目标变量y的数据对象\n",
    "# M为恶性肿瘤，为1    B为良性肿瘤，为0\n",
    "credit_df['target'] = 0\n",
    "credit_df.loc[(credit_df.situation == 'M'), 'target'] = 1\n",
    "\n",
    "trainData_y = credit_df['target'] \n",
    "# 数值统计\n",
    "trainData_y.value_counts()\n",
    "\n",
    "trainData_X = credit_df.drop(columns=['situation'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8a87923a-904b-4d4c-a6ef-5b8820f2d1dc",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "ad8652f0-da8e-46de-a4cd-2932d4e5909d",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train,X_test,y_train,y_test  =  train_test_split(trainData_X, trainData_y,test_size=0.3, random_state=123456)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "f02d27d4-6349-4217-b4ba-8eeada194f83",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(398, 31)"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "eb71db1e-81ec-4d38-9582-06a716c6c62b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression(solver=&#x27;liblinear&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression(solver=&#x27;liblinear&#x27;)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "LogisticRegression(solver='liblinear')"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "# 通过LogisticRegression类定义一个逻辑回归模型名字叫lr\n",
    "lr = LogisticRegression(solver='liblinear')\n",
    "\n",
    "# 对lr模型进行训练(fit)\n",
    "lr.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "44659863-1588-49cd-8f65-426699c3b5cb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-0.18005811]\n",
      "[[-9.32299582e-01  4.04294858e-02 -1.28009781e-01  4.78030359e-03\n",
      "   5.24259706e-02  1.70182433e-01  2.40609480e-01  1.21369687e-01\n",
      "   4.66302053e-02  1.16305258e-02 -6.17648065e-03 -2.40453634e-01\n",
      "   9.89746159e-02  4.78715666e-02  4.83611431e-03  1.35871268e-02\n",
      "   2.79077526e-02  1.39694060e-02  8.99405618e-03  2.70454736e-04\n",
      "  -8.06736576e-01  8.95922964e-02  1.10428831e-01  2.00850074e-02\n",
      "   8.73805197e-02  4.54636980e-01  5.71062232e-01  2.15663377e-01\n",
      "   1.46592177e-01  4.45862105e-02  3.98257853e+00]]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>coef</th>\n",
       "      <th>columns</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[-0.9322995823945017]</td>\n",
       "      <td>column_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[0.04042948575978604]</td>\n",
       "      <td>column_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[-0.128009780647603]</td>\n",
       "      <td>column_3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[0.004780303590661737]</td>\n",
       "      <td>column_4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[0.052425970630658396]</td>\n",
       "      <td>column_5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>[0.17018243326356014]</td>\n",
       "      <td>column_6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>[0.24060948019310577]</td>\n",
       "      <td>column_7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>[0.12136968739178541]</td>\n",
       "      <td>column_8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>[0.04663020525891181]</td>\n",
       "      <td>column_9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>[0.011630525800464676]</td>\n",
       "      <td>column_10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>[-0.00617648065411272]</td>\n",
       "      <td>column_11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>[-0.2404536341145487]</td>\n",
       "      <td>column_12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>[0.09897461593897172]</td>\n",
       "      <td>column_13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>[0.04787156659728709]</td>\n",
       "      <td>column_14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>[0.0048361143149361135]</td>\n",
       "      <td>column_15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>[0.01358712676757632]</td>\n",
       "      <td>column_16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>[0.027907752625147244]</td>\n",
       "      <td>column_17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>[0.01396940600306937]</td>\n",
       "      <td>column_18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>[0.008994056181625932]</td>\n",
       "      <td>column_19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>[0.00027045473560933684]</td>\n",
       "      <td>column_20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>[-0.8067365757807152]</td>\n",
       "      <td>column_21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>[0.08959229642011343]</td>\n",
       "      <td>column_22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>[0.11042883111555056]</td>\n",
       "      <td>column_23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>[0.02008500742190334]</td>\n",
       "      <td>column_24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>[0.08738051968860368]</td>\n",
       "      <td>column_25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>[0.4546369795441018]</td>\n",
       "      <td>column_26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>[0.5710622320611318]</td>\n",
       "      <td>column_27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>[0.21566337744273006]</td>\n",
       "      <td>column_28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>[0.14659217711614275]</td>\n",
       "      <td>column_29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>[0.044586210478352636]</td>\n",
       "      <td>column_30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>[3.9825785296130567]</td>\n",
       "      <td>target</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        coef    columns\n",
       "0      [-0.9322995823945017]   column_1\n",
       "1      [0.04042948575978604]   column_2\n",
       "2       [-0.128009780647603]   column_3\n",
       "3     [0.004780303590661737]   column_4\n",
       "4     [0.052425970630658396]   column_5\n",
       "5      [0.17018243326356014]   column_6\n",
       "6      [0.24060948019310577]   column_7\n",
       "7      [0.12136968739178541]   column_8\n",
       "8      [0.04663020525891181]   column_9\n",
       "9     [0.011630525800464676]  column_10\n",
       "10    [-0.00617648065411272]  column_11\n",
       "11     [-0.2404536341145487]  column_12\n",
       "12     [0.09897461593897172]  column_13\n",
       "13     [0.04787156659728709]  column_14\n",
       "14   [0.0048361143149361135]  column_15\n",
       "15     [0.01358712676757632]  column_16\n",
       "16    [0.027907752625147244]  column_17\n",
       "17     [0.01396940600306937]  column_18\n",
       "18    [0.008994056181625932]  column_19\n",
       "19  [0.00027045473560933684]  column_20\n",
       "20     [-0.8067365757807152]  column_21\n",
       "21     [0.08959229642011343]  column_22\n",
       "22     [0.11042883111555056]  column_23\n",
       "23     [0.02008500742190334]  column_24\n",
       "24     [0.08738051968860368]  column_25\n",
       "25      [0.4546369795441018]  column_26\n",
       "26      [0.5710622320611318]  column_27\n",
       "27     [0.21566337744273006]  column_28\n",
       "28     [0.14659217711614275]  column_29\n",
       "29    [0.044586210478352636]  column_30\n",
       "30      [3.9825785296130567]     target"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "# 查看模型结果\n",
    "print(lr.intercept_ )\n",
    "print(lr.coef_)\n",
    "\n",
    "# 把变量名称和系数对应在一起方便查看\n",
    "pd.DataFrame(list(zip(np.transpose(lr.coef_), X_train.columns)), columns=['coef', 'columns'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "333c6c8d-f096-4ce4-a7f0-12af685cdf18",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       1.00      0.99      1.00       110\n",
      "           1       0.98      1.00      0.99        61\n",
      "\n",
      "    accuracy                           0.99       171\n",
      "   macro avg       0.99      1.00      0.99       171\n",
      "weighted avg       0.99      0.99      0.99       171\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "# 利用模型对测试集进行预测，输出target预测标签值和概率\n",
    "y_test_pred = lr.predict(X_test)\n",
    "y_test_prob = lr.predict_proba(X_test)\n",
    "\n",
    "# 分类评估汇总报告classification_report\n",
    "print(classification_report(y_test,y_test_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "48cc4e75-a1ef-4748-ad8a-5868c8c05890",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression(C=1, penalty='l1', solver='liblinear')\n",
      "1.0\n",
      "{'C': 1, 'penalty': 'l1'}\n"
     ]
    }
   ],
   "source": [
    "# grid search调参\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "parameters = {\n",
    "    'penalty': ('l1', 'l2'),\n",
    "    'C': (0.01, 0.1, 1, 10),\n",
    "}\n",
    "\n",
    "lr = LogisticRegression(solver='liblinear')\n",
    "lr_search = GridSearchCV(lr, parameters, scoring='accuracy', cv=5)\n",
    "lr_search.fit(X_train, y_train)\n",
    "\n",
    "#查看最佳结果\n",
    "print(lr_search.best_estimator_)\n",
    "print(lr_search.best_score_)\n",
    "print(lr_search.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "1db8caf2-9871-4f43-afcf-3b628548510b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-4 {color: black;background-color: white;}#sk-container-id-4 pre{padding: 0;}#sk-container-id-4 div.sk-toggleable {background-color: white;}#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-4 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-4 div.sk-item {position: relative;z-index: 1;}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-4 div.sk-item::before, #sk-container-id-4 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-4 div.sk-label-container {text-align: center;}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-4 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression(C=1, penalty=&#x27;l1&#x27;, solver=&#x27;liblinear&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression(C=1, penalty=&#x27;l1&#x27;, solver=&#x27;liblinear&#x27;)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "LogisticRegression(C=1, penalty='l1', solver='liblinear')"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lr = LogisticRegression(C=1,penalty='l1',solver='liblinear')\n",
    "lr.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "d20ef727-77d9-4757-941c-c0dfa5f36391",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       1.00      1.00      1.00       110\n",
      "           1       1.00      1.00      1.00        61\n",
      "\n",
      "    accuracy                           1.00       171\n",
      "   macro avg       1.00      1.00      1.00       171\n",
      "weighted avg       1.00      1.00      1.00       171\n",
      "\n",
      "[[110   0]\n",
      " [  0  61]]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix   #混淆矩阵\n",
    "# 利用模型对测试集进行预测，输出target预测标签值和概率\n",
    "y_test_pred = lr.predict(X_test)\n",
    "y_test_prob = lr.predict_proba(X_test)\n",
    "# 分类评估汇总报告classification_report\n",
    "print(classification_report(y_test,y_test_pred))\n",
    "\n",
    "# 误分类矩阵 confusion_matrix\n",
    "print(confusion_matrix(y_test,y_test_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a5df1cad-cad2-4257-9586-fbc011388614",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
